#--------------------------------------------------------------------------------------------------------------------#
#------------------------------------------------------ Lea Portmann ------------------------------------------------#
#----------------------------------------------------- September 2020 -----------------------------------------------#
#--------- What Makes a Successful Candidate? Political Experience and Low-Information Cues in Elections ------------#
#-------------------------------------------------- Further analyses ------------------------------------------------#
#--------------------------------------------------------------------------------------------------------------------#

library(xtable)
library(dplyr)
library(statmod)
library(ggplot2)
library(gamlss)
library(lme4)
library(texreg)
library(dplyr)
library(ggpubr)
library(reshape2)
library(xtable)
library(broom)

# ---------------------------------------------------------------------- Read data

rm(list=ls())
setwd(".../...")

load("d3")

#Drop incumbent candidates
d3_m <- d3[d3$incumbent == 0, ]

#Drop observations with missing values of dependent variable "negative preference votes"
d3_mn  <- d3_m[d3_m$n_pv != 0, ]

#Drop observations with missing values of dependent variable "positive preference votes"
d3_mp <- d3_m[d3_m$pos_rel!=0,]

# ---------------------------------------------------------------------- Descriptive statistics

#Table dependent variables

a <- c("n_pv", "pos_rel")

T_DV <- d3_m %>% dplyr::ungroup() %>% 
dplyr::select(a) %>% 
dplyr::summarize_all(c("min", "max", "mean", "sd"))
T_DV <- data.frame(matrix(as.vector(T_DV), nrow = 2, ncol = 4))
T_DV <- sapply(T_DV, as.numeric)
colnames(T_DV) <- c("Min","Max", "Mean", "SD")
rownames(T_DV) <- c("Negative preference votes", "Positive preference votes")
T_DV <- apply(T_DV, c(1,2), function(x) round(x, 2))
xtable(T_DV)

#Table independent variables (numeric)

b <- c("relpos", "num_cand")

T_EV <- d3_m %>% dplyr::ungroup() %>% 
dplyr::select(b) %>% 
dplyr::summarize_all(c("min", "max", "mean", "sd"))
T_EV <- data.frame(matrix(as.vector(T_EV), nrow = 2, ncol = 4))
T_EV <- sapply(T_EV, as.numeric)
colnames(T_EV) <- c("Min","Max", "Mean", "SD")
rownames(T_EV) <- c("Relative position on ballot", "Number of candidates on ballot")
T_EV <- apply(T_EV, c(1,2), function(x) round(x, 2))
xtable(T_EV)

#Table independent variables (factors)

###Factors

AT_2b <- d3_m %>%
  dplyr::select(nonswissname) %>%
  na.omit() %>% 
  group_by(nonswissname) %>%
  dplyr::summarize(n = n()) %>%
  dplyr::mutate(freq = (n / sum(n))*100) %>% 
  dplyr::mutate(freq = round(freq, digits=2)) %>%
  dplyr::rename(Variable = nonswissname) 
x <- c("Non-Swiss name", "", "")
AT_2b <- rbind(x, AT_2b)

AT_2c <- d3_m %>%
  dplyr::select(precumulated) %>%
  group_by(precumulated) %>%
  dplyr::summarize(n = n()) %>%
  dplyr::mutate(freq = (n / sum(n))*100) %>%
  dplyr::mutate(freq = round(freq, digits=2)) %>%
  dplyr::rename(Variable = precumulated) 
x <- c("Pre-cumulation", "", "")
AT_2c <- as.matrix(AT_2c)
AT_2c <- rbind(x, AT_2c)

AT_2d <- d3_m %>%
  dplyr::select(prof_c3) %>%
  group_by(prof_c3) %>%
  dplyr::summarize(n = n()) %>%
  dplyr::mutate(freq = (n / sum(n))*100) %>%
  dplyr::mutate(freq = round(freq, digits=2)) %>%
  dplyr::rename(Variable = prof_c3) 
x <- c("Profession", "", "")
AT_2d <- as.matrix(AT_2d)
AT_2d <- rbind(x, AT_2d)

AT_2e <- d3_m %>%
  dplyr::select(sex_f) %>%
  group_by(sex_f) %>%
  dplyr::summarize(n = n()) %>%
  dplyr::mutate(freq = (n / sum(n))*100) %>%
  dplyr::mutate(freq = round(freq, digits=2)) %>%
  dplyr::rename(Variable = sex_f) 
x <- c("Gender", "", "")
AT_2e <- as.matrix(AT_2e)
AT_2e <- rbind(x, AT_2e)

AT_2f <- d3_m %>%
  dplyr::select(age_c3) %>%
  group_by(age_c3) %>%
  dplyr::summarize(n = n()) %>%
  dplyr::mutate(freq = (n / sum(n))*100) %>%
  dplyr::mutate(freq = round(freq, digits=2)) %>%
  dplyr::rename(Variable = age_c3) 
x <- c("Age", "", "")
AT_2f <- as.matrix(AT_2f)
AT_2f <- rbind(x, AT_2f)

AT_2g <- d3_m %>%
  dplyr::select(nonelec_d_eff) %>%
  group_by(nonelec_d_eff) %>%
  dplyr::summarize(n = n()) %>%
  dplyr::mutate(freq = (n / sum(n))*100) %>%
  dplyr::mutate(freq = round(freq, digits=2)) %>%
  dplyr::rename(Variable = nonelec_d_eff) 
x <- c("Nonelective office", "", "")
AT_2g <- as.matrix(AT_2g)
AT_2g <- rbind(x, AT_2g)

AT_2h <- d3_m %>%
  dplyr::select(poloff_d) %>%
  group_by(poloff_d) %>%
  dplyr::summarize(n = n()) %>%
  dplyr::mutate(freq = (n / sum(n))*100) %>%
  dplyr::mutate(freq = round(freq, digits=2)) %>%
  dplyr::rename(Variable = poloff_d) 
x <- c("Political office", "", "")
AT_2h <- as.matrix(AT_2h)
AT_2h <- rbind(x, AT_2h)

AT_2i <- d3_m %>%
  dplyr::select(party_c4) %>%
  group_by(party_c4) %>%
  dplyr::summarize(n = n()) %>%
  dplyr::mutate(freq = (n / sum(n))*100) %>%
  dplyr::mutate(freq = round(freq, digits=2)) %>%
  dplyr::rename(Variable = party_c4) 
x <- c("Party", "", "")
AT_2i <- as.matrix(AT_2i)
AT_2i <- rbind(x, AT_2i)

AT_2 <- rbind(AT_2b, AT_2c, AT_2d, AT_2e, AT_2f, AT_2g, AT_2h, AT_2i)

print(xtable((as.matrix(AT_2)), caption = "Descriptive statistics factor variables",
col.names = c("Variable", "n", "Share (%)"), format = "pandoc"), include.rownames=FALSE)

#Table treatment variables

AT_3a <- d3_m %>%
  dplyr::select(TApoloff) %>%
  group_by(TApoloff) %>%
  dplyr::summarize(n = n()) %>%
  dplyr::mutate(freq = (n / sum(n))*100) %>%
  dplyr::mutate(freq = round(freq, digits=2)) %>%
  dplyr::rename(Variable = TApoloff) 
x <- c("Political office on ballot", "", "")
AT_3a <- as.matrix(AT_3a)
AT_3a <- rbind(x, AT_3a)

AT_3b <- d3_m %>%
  dplyr::select(TAnonelec) %>%
  group_by(TAnonelec) %>%
  dplyr::summarize(n = n()) %>%
  dplyr::mutate(freq = (n / sum(n))*100) %>%
  dplyr::mutate(freq = round(freq, digits=2)) %>%
  dplyr::rename(Variable = TAnonelec) 
x <- c("Nonelective office on ballot", "", "")
AT_3b <- as.matrix(AT_3b)
AT_3b <- rbind(x, AT_3b)

AT_3 <- rbind(AT_3a, AT_3b)

print(xtable((as.matrix(AT_3)), caption = "Descriptive statistics variables capturing information on the ballot",
             col.names = c("Variable", "n", "Share (%)"), format = "pandoc"), include.rownames=FALSE)

# ---------------------------------------------------------------------- Table ballot information by social cues

d3_m$poloff_d <- as.numeric(d3_m$poloff_d)
d3_m$nonelec_d <- as.numeric(d3_m$nonelec_d)

# ----- Non-Swiss name

d3_m$nonswissname <- factor(d3_m$nonswissname, 
                            levels = c(0,1), 
                            labels = c("Swiss name", "Non-Swiss name"))

nonswissname_poloff <- d3_m %>% 
  dplyr::group_by(nonswissname, poloff_d) %>% 
  dplyr::summarize(n = n()) %>% 
  dplyr::mutate(Percent= (n/sum(n)*100))
nonswissname_poloff$Percent <- paste0("(", round(nonswissname_poloff$Percent, 1), "%)")

nonswiss_wide_perc <- dcast(nonswissname_poloff, nonswissname ~ poloff_d,  value.var="Percent")
nonswiss_wide_n <- dcast(nonswissname_poloff, nonswissname ~ poloff_d, value.var="n")
nonswiss_wide_poloff <- rbind(nonswiss_wide_n, nonswiss_wide_perc)
nonswiss_wide_poloff <- nonswiss_wide_poloff %>% arrange(nonswissname)
colnames(nonswiss_wide_poloff) <- c("Non-Swiss name", "No political office", "Political office")

nonswissname_nonelect <- d3_m %>% 
  dplyr::group_by(nonswissname, nonelec_d) %>% 
  dplyr::summarize(n = n()) %>% 
  dplyr::mutate(Percent= (n/sum(n)*100))
nonswissname_nonelect$Percent <- paste0("(", round(nonswissname_nonelect$Percent, 1), "%)")
nonswiss_wide_perc_nonelect <- dcast(nonswissname_nonelect, nonswissname ~ nonelec_d,  value.var="Percent")
nonswiss_wide_n_nonelect <- dcast(nonswissname_nonelect, nonswissname ~ nonelec_d, value.var="n")
nonswiss_wide_nonelect <- rbind(nonswiss_wide_n_nonelect, nonswiss_wide_perc_nonelect)
nonswiss_wide_nonelect <- nonswiss_wide_nonelect %>% arrange(nonswissname)
colnames(nonswiss_wide_nonelect) <- c("Non-Swiss name", "No elective office", "Elective office")

nonswissname_balance <- cbind(nonswiss_wide_poloff, nonswiss_wide_nonelect)
nonswissname_balance <- nonswissname_balance[, -4]

print(xtable(as.matrix(nonswissname_balance), caption = "Candidate name (Swiss, non-Swiss) and political office, number of candidates and share"), include.rownames=FALSE)

# ----- Age

d3_m$age_c3 <- factor(d3_m$age_c3, 
                      levels = c(0, 1, 2), 
                      labels = c("18-30 years", "31-50 years", "50+ years"))

age_poloff <- d3_m %>% 
  dplyr::group_by(age_c3, poloff_d) %>% 
  dplyr::summarize(n = n()) %>% 
  dplyr::mutate(Percent= (n/sum(n)*100))
age_poloff$Percent <- paste0("(", round(age_poloff$Percent, 1), "%)")
age_wide_perc <- dcast(age_poloff, age_c3 ~ poloff_d,  value.var="Percent")
age_wide_n <- dcast(age_poloff, age_c3 ~ poloff_d, value.var="n")
age_wide_poloff <- rbind(age_wide_n, age_wide_perc)
age_wide_poloff <- age_wide_poloff %>% arrange(age_c3)
colnames(age_wide_poloff) <- c("Age", "No political office", "Political office")
age_wide_poloff

age_nonelect <- d3_m %>% 
  dplyr::group_by(age_c3, nonelec_d) %>% 
  dplyr::summarize(n = n()) %>% 
  dplyr::mutate(Percent= (n/sum(n)*100))
age_nonelect$Percent <- paste0("(", round(age_nonelect$Percent, 1), "%)")
age_wide_perc_nonelect <- dcast(age_nonelect, age_c3 ~ nonelec_d,  value.var="Percent")
age_wide_n_nonelect <- dcast(age_nonelect, age_c3 ~ nonelec_d, value.var="n")
age_wide_nonelect <- rbind(age_wide_n_nonelect, age_wide_perc_nonelect)
age_wide_nonelect <- age_wide_nonelect %>% arrange(age_c3)
colnames(age_wide_nonelect) <- c("Age", "No elective office", "Elective office")

age_balance <- cbind(age_wide_poloff, age_wide_nonelect)
age_balance <- age_balance[, -4]

print(xtable(as.matrix(age_balance), caption = "Candidate age and political office, number of candidates and share"), include.rownames=FALSE)

# ----- Female

table(d3_m$sex_f)

d3_m$sex_f <- factor(d3_m$sex_f, 
                     levels = c(0, 1), 
                     labels = c("Male", "Female"))

sex_poloff <- d3_m %>% 
  dplyr::group_by(sex_f, poloff_d) %>% 
  dplyr::summarize(n = n()) %>% 
  dplyr::mutate(Percent= (n/sum(n)*100))
sex_poloff$Percent <- paste0("(", round(sex_poloff$Percent, 1), "%)")
sex_wide_perc <- dcast(sex_poloff, sex_f ~ poloff_d,  value.var="Percent")
sex_wide_n <- dcast(sex_poloff, sex_f ~ poloff_d, value.var="n")
sex_wide_poloff <- rbind(sex_wide_n, sex_wide_perc)
sex_wide_poloff <- sex_wide_poloff %>% arrange(sex_f)
colnames(sex_wide_poloff) <- c("Gender", "No political office", "Political office")
sex_wide_poloff

sex_nonelect <- d3_m %>% 
  dplyr::group_by(sex_f, nonelec_d) %>% 
  dplyr::summarize(n = n()) %>% 
  dplyr::mutate(Percent= (n/sum(n)*100))
sex_nonelect$Percent <- paste0("(", round(sex_nonelect$Percent, 1), "%)")
sex_wide_perc_nonelect <- dcast(sex_nonelect, sex_f ~ nonelec_d,  value.var="Percent")
sex_wide_n_nonelect <- dcast(sex_nonelect, sex_f ~ nonelec_d, value.var="n")
sex_wide_nonelect <- rbind(sex_wide_n_nonelect, sex_wide_perc_nonelect)
sex_wide_nonelect <- sex_wide_nonelect %>% arrange(sex_f)
colnames(sex_wide_nonelect) <- c("Gender", "No elective office", "Elective office")

sex_balance <- cbind(sex_wide_poloff, sex_wide_nonelect)
sex_balance <- sex_balance[, -4]

print(xtable(as.matrix(sex_balance), caption = "Candidate gender and political office, number of candidates and share"), include.rownames=FALSE)

# ----- Profession

table(d3_m$prof_c3)
d3_m$prof_c3 <- factor(d3_m$prof_c3, 
                       levels = c(1, 2, 3), 
                       labels = c("high-skilled", "medium-skilled", "low-skilled"))

prof_poloff <- d3_m %>% 
  dplyr::group_by(prof_c3, poloff_d) %>% 
  dplyr::summarize(n = n()) %>% 
  dplyr::mutate(Percent= (n/sum(n)*100))
prof_poloff$Percent <- paste0("(", round(prof_poloff$Percent, 1), "%)")
prof_wide_perc <- dcast(prof_poloff, prof_c3 ~ poloff_d,  value.var="Percent")
prof_wide_n <- dcast(prof_poloff, prof_c3 ~ poloff_d, value.var="n")
prof_wide_poloff <- rbind(prof_wide_n, prof_wide_perc)
prof_wide_poloff <- prof_wide_poloff %>% arrange(prof_c3)
colnames(prof_wide_poloff) <- c("Profession", "No political office", "Political office")
prof_wide_poloff

prof_nonelect <- d3_m %>% 
  dplyr::group_by(prof_c3, nonelec_d) %>% 
  dplyr::summarize(n = n()) %>% 
  dplyr::mutate(Percent= (n/sum(n)*100))
prof_nonelect$Percent <- paste0("(", round(prof_nonelect$Percent, 1), "%)")
prof_wide_perc_nonelect <- dcast(prof_nonelect, prof_c3 ~ nonelec_d,  value.var="Percent")
prof_wide_n_nonelect <- dcast(prof_nonelect, prof_c3 ~ nonelec_d, value.var="n")
prof_wide_nonelect <- rbind(prof_wide_n_nonelect, prof_wide_perc_nonelect)
prof_wide_nonelect <- prof_wide_nonelect %>% arrange(prof_c3)
colnames(prof_wide_nonelect) <- c("Profession", "No elective office", "Elective office")

prof_balance <- cbind(prof_wide_poloff, prof_wide_nonelect)
prof_balance <- prof_balance[, -4]

print(xtable(as.matrix(prof_balance), caption = "Candidate profession (skill level) and political office, number of candidates and share"), include.rownames=FALSE)


# ---------------------------------------------------------------------- Distribution positive preference votes

# ----- Model

m1 <- lmer(pos_rel ~ nonswissname + relpos_c + precumulated  +
factor(prof_c3) + sex_f + age_c3 + party_c4 + num_cand_c +
poloff_d  + nonelec_d_eff + (1|list_id), data=d3_m)
summary(m1)

# ----- Distribution dependent variable

invgauss <- rinvgauss(1000, mean=3, shape=0.8)
invgauss <- as.data.frame(invgauss)
dist_dv_pos <- ggplot(invgauss, aes(invgauss)) + geom_density(fill="grey50", alpha=0.5) + 
geom_density(data = d3, aes(pos_rel), fill="black", alpha=0.5) + 
theme_bw() + 
labs(x="Positive preference votes/Inverse Gaussian Distribution", y="Density"); dist_dv_pos

# ----- Residuals

y <- rnorm(10)
x <- 1:10
df <- augment(m1)
dv_pos_red <- ggplot(df, aes(x= .fitted, y = .resid)) + geom_point(color="black") +
labs(x="Fitted values", y="Residuals") + theme_bw() 

jpeg(file="residuals_pos.jpeg", 
width = 8, height = 5, units = "in", res = 300)
plot(dv_pos_red)
dev.off()

dv_pos <- ggarrange(dist_dv_pos, dv_pos_red, 
labels = c("", ""),
ncol = 2, nrow = 1)
dv_pos


